import requests
from bs4 import BeautifulSoup
import pymysql


class Word():
    www_url = 'https://chengyu.911cha.com/'
    start_html = 'https://chengyu.911cha.com/pinyin_a.html'

    def __init__(self):
        self.pinyin = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
                       't', 'u', 'v', 'w', 'x', 'y', 'z']
        # self.pinyin = ['a', 'b']
        # pinyin_data = 'a、b、c、d、e、f、g、h、i、j、k、l、m、n、o、p、q、r、s、t、u、v、w、x、y、z';
        # print(pinyin_data.split("、"))
        self.conn = pymysql.connect(host='localhost', user="root", password="123456", database="huibo_test")
        self.nowA = None
        self.nowNum = 1

    def runPinyin(self):
        for i in self.pinyin:
            self.nowNum = 1
            self.run(i)

    def run(self, zimu):
        self.nowA = zimu
        # head 模拟浏览器
        kv = {
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
            'referer': self.www_url,
            'cookie': 'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1575636408; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1575636892'}

        if self.nowNum == 1:
            word_html = './word/pinyin_' + self.nowA + '.html'
            word_html_url = '/pinyin_' + self.nowA + '.html'
        else:
            word_html = './word/pinyin_' + self.nowA + '_p' + str(self.nowNum) + '.html'
            word_html_url = '/pinyin_' + self.nowA + '_p' + str(self.nowNum) + '.html'
        # 请求和写入文件
        res = requests.get(self.www_url + word_html_url, headers=kv)
        res.encoding = 'utf-8'
        # print(word_html)
        # 以写的方式打开文件
        file = open(word_html, mode='w', encoding='utf8')
        # 写入数据
        file.write(res.text)
        # 关闭文件
        file.close()

        # 网页解析
        soup = BeautifulSoup(open(word_html, mode='r', encoding='utf8'), 'html.parser')

        # 获取分页
        word_data_page = soup.select("div .gclear.center a")
        # 获取成语
        word_data_ul = soup.select("ul.center a")
        max_num = 1
        # 最大页数
        for i in word_data_page:
            # print(i.getText())
            tx = i.getText()
            if tx.isnumeric():
                max_num = int(tx)

        # print(word_data_page)
        # print(word_data_ul)
        word_list = [];
        for i in word_data_ul:
            tx = i.getText()
            t = (tx, '2021-06-22')
            word_list.append(t)
        # 每一页单词写入
        self.insertWord(word_list)

        # 当前字母是否还有下一页
        if max_num > self.nowNum:
            self.nowNum = self.nowNum + 1
            self.run(zimu)

    def insertWord(self, word_list):

        cursor = self.conn.cursor()
        sql = "insert into word (word,create_time)  values(%s,%s)"
        print(word_list)
        insert = cursor.executemany(sql, word_list)
        cursor.close()
        self.conn.commit()
        self.conn.close()


if __name__ == "__main__":
    word = Word()
    word.runPinyin()
